home *** CD-ROM | disk | FTP | other *** search
Text File | 1994-08-02 | 70.8 KB | 1,058 lines |
-
- PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 1
-
- Footnotes Actions DO Loops Line
-
- DIR 1 # 1 "linpackd.f"
- 2 *
- 3 *PLEASE NOTE THAT netlib HAS MOVED, THE NEW ADDRESS IS netlib@ornl.gov.
- 4 *THE OLD ADDRESS, netlib@mcs.anl.gov, WILL BE TURNED OFF SOON.
- 5 *
- 6 *** from netlib, Fri Jul 27 14:07:10 EDT 1990 ***
- 7 double precision second
- 8 double precision aa(200,200),a(201,200),b(200),x(200)
- 9 double precision time(8,6),cray,ops,total,norma,normx
- 10 double precision resid,residn,eps,epslon
- 11 integer ipvt(200)
- 12 lda = 201
- 13 ldaa = 200
- 14 c
- 15 n = 100
- 16 cray = .056
- 17 write(6,1)
- 18 1 format(' Please send the results of this run to:'//
- 19 $ ' Jack J. Dongarra'/
- 20 $ ' Computer Science Department'/
- 21 $ ' University of Tennessee'/
- 22 $ ' Knoxville, Tennessee 37996-1300'//
- 23 $ ' Fax: 615-974-8296'//
- 24 $ ' Internet: dongarra@cs.utk.edu'/)
- SO 25 ops = (2.0d0*n**3)/3.0d0 + 2.0d0*n**2
- 26 c
- 27 call matgen(a,lda,n,b,norma)
- 28 t1 = second()
- 29 call dgefa(a,lda,n,ipvt,info)
- 30 time(1,1) = second() - t1
- 31 t1 = second()
- 32 call dgesl(a,lda,n,ipvt,b,0)
- 33 time(1,2) = second() - t1
- 34 total = time(1,1) + time(1,2)
- 35 c
- 36 c compute a residual to verify results.
- 37 c
- 1 2 SO C +--------- 38 do 10 i = 1,n
- * 39 x(i) = b(i)
- *_________ 40 10 continue
- 41 call matgen(a,lda,n,b,norma)
- 1 2 SO C +--------- 42 do 20 i = 1,n
- SO * 43 b(i) = -b(i)
- *_________ 44 20 continue
- 45 call dmxpy(n,b,n,lda,x,a)
- 46 resid = 0.0
- 47 normx = 0.0
- 1 2 SO +--------- 48 do 30 i = 1,n
- 3 DD ! 49 resid = dmax1( resid, dabs(b(i)) )
- 4 DD ! 50 normx = dmax1( normx, dabs(x(i)) )
- !_________ 51 30 continue
-
- PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 2
-
- 52 eps = epslon(1.0d0)
- 53 residn = resid/( n*norma*normx*eps )
- 54 write(6,40)
- 55 40 format(' norm. resid resid machep',
- 56 $ ' x(1) x(n)')
- 57 write(6,50) residn,resid,eps,x(1),x(n)
- 58 50 format(1p5e16.8)
- 59 c
- 60 write(6,60) n
- 61 60 format(//' times are reported for matrices of order ',i5)
- 62 write(6,70)
- 63 70 format(6x,'dgefa',6x,'dgesl',6x,'total',5x,'mflops',7x,'unit',
- 64 $ 6x,'ratio')
- 65 c
- 66 time(1,3) = total
- 67 time(1,4) = ops/(1.0d6*total)
- 68 time(1,5) = 2.0d0/time(1,4)
- 69 time(1,6) = total/cray
- 70 write(6,80) lda
- 71 80 format(' times for array with leading dimension of',i4)
- 72 write(6,110) (time(1,i),i=1,6)
- 73 c
- 74 call matgen(a,lda,n,b,norma)
- 75 t1 = second()
- 76 call dgefa(a,lda,n,ipvt,info)
- 77 time(2,1) = second() - t1
- 78 t1 = second()
- 79 call dgesl(a,lda,n,ipvt,b,0)
- 80 time(2,2) = second() - t1
- 81 total = time(2,1) + time(2,2)
- 82 time(2,3) = total
- 83 time(2,4) = ops/(1.0d6*total)
- 84 time(2,5) = 2.0d0/time(2,4)
- 85 time(2,6) = total/cray
- 86 c
- 87 call matgen(a,lda,n,b,norma)
- 88 t1 = second()
- 89 call dgefa(a,lda,n,ipvt,info)
- 90 time(3,1) = second() - t1
- 91 t1 = second()
- 92 call dgesl(a,lda,n,ipvt,b,0)
- 93 time(3,2) = second() - t1
- 94 total = time(3,1) + time(3,2)
- 95 time(3,3) = total
- 96 time(3,4) = ops/(1.0d6*total)
- 97 time(3,5) = 2.0d0/time(3,4)
- 98 time(3,6) = total/cray
- 99 c
- 5 SO 100 ntimes = 10
- 101 tm2 = 0
- 102 t1 = second()
- SO +--------- 103 do 90 i = 1,ntimes
- 6 NO NCS ! 104 tm = second()
-
- PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 3
-
- 7 8 9 10
- 11 12 NO DD NCS ! 105 call matgen(a,lda,n,b,norma)
- 6 13 NO DD NCS ! 106 tm2 = tm2 + second() - tm
- 7 8 9 14
- 15 16 NO DD NCS ! 107 call dgefa(a,lda,n,ipvt,info)
- !_________ 108 90 continue
- SO 109 time(4,1) = (second() - t1 - tm2)/ntimes
- 110 t1 = second()
- SO +--------- 111 do 100 i = 1,ntimes
- 7 8 9 11
- 15 17 NO DD NCS ! 112 call dgesl(a,lda,n,ipvt,b,0)
- !_________ 113 100 continue
- SO 114 time(4,2) = (second() - t1)/ntimes
- 115 total = time(4,1) + time(4,2)
- 116 time(4,3) = total
- 117 time(4,4) = ops/(1.0d6*total)
- 118 time(4,5) = 2.0d0/time(4,4)
- 119 time(4,6) = total/cray
- 120 c
- 121 write(6,110) (time(2,i),i=1,6)
- 122 write(6,110) (time(3,i),i=1,6)
- 123 write(6,110) (time(4,i),i=1,6)
- 124 110 format(6(1pe11.3))
- 125 c
- 126 call matgen(aa,ldaa,n,b,norma)
- 127 t1 = second()
- 128 call dgefa(aa,ldaa,n,ipvt,info)
- 129 time(5,1) = second() - t1
- 130 t1 = second()
- 131 call dgesl(aa,ldaa,n,ipvt,b,0)
- 132 time(5,2) = second() - t1
- 133 total = time(5,1) + time(5,2)
- 134 time(5,3) = total
- 135 time(5,4) = ops/(1.0d6*total)
- 136 time(5,5) = 2.0d0/time(5,4)
- 137 time(5,6) = total/cray
- 138 c
- 139 call matgen(aa,ldaa,n,b,norma)
- 140 t1 = second()
- 141 call dgefa(aa,ldaa,n,ipvt,info)
- 142 time(6,1) = second() - t1
- 143 t1 = second()
- 144 call dgesl(aa,ldaa,n,ipvt,b,0)
- 145 time(6,2) = second() - t1
- 146 total = time(6,1) + time(6,2)
- 147 time(6,3) = total
- 148 time(6,4) = ops/(1.0d6*total)
- 149 time(6,5) = 2.0d0/time(6,4)
- 150 time(6,6) = total/cray
- 151 c
- 152 call matgen(aa,ldaa,n,b,norma)
- 153 t1 = second()
- 154 call dgefa(aa,ldaa,n,ipvt,info)
-
- PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 4
-
- 155 time(7,1) = second() - t1
- 156 t1 = second()
- 157 call dgesl(aa,ldaa,n,ipvt,b,0)
- 158 time(7,2) = second() - t1
- 159 total = time(7,1) + time(7,2)
- 160 time(7,3) = total
- 161 time(7,4) = ops/(1.0d6*total)
- 162 time(7,5) = 2.0d0/time(7,4)
- 163 time(7,6) = total/cray
- 164 c
- 5 SO 165 ntimes = 10
- 166 tm2 = 0
- 167 t1 = second()
- SO +--------- 168 do 120 i = 1,ntimes
- 6 NO NCS ! 169 tm = second()
- 7 10 11 12
- 18 19 NO DD NCS ! 170 call matgen(aa,ldaa,n,b,norma)
- 6 13 NO DD NCS ! 171 tm2 = tm2 + second() - tm
- 7 14 15 16
- 18 19 NO DD NCS ! 172 call dgefa(aa,ldaa,n,ipvt,info)
- !_________ 173 120 continue
- SO 174 time(8,1) = (second() - t1 - tm2)/ntimes
- 175 t1 = second()
- SO +--------- 176 do 130 i = 1,ntimes
- 7 11 15 17
- 18 19 NO DD NCS ! 177 call dgesl(aa,ldaa,n,ipvt,b,0)
- !_________ 178 130 continue
- SO 179 time(8,2) = (second() - t1)/ntimes
- 180 total = time(8,1) + time(8,2)
- 181 time(8,3) = total
- 182 time(8,4) = ops/(1.0d6*total)
- 183 time(8,5) = 2.0d0/time(8,4)
- 184 time(8,6) = total/cray
- 185 c
- 186 write(6,140) ldaa
- 187 140 format(/' times for array with leading dimension of',i4)
- 188 write(6,110) (time(5,i),i=1,6)
- 189 write(6,110) (time(6,i),i=1,6)
- 190 write(6,110) (time(7,i),i=1,6)
- 191 write(6,110) (time(8,i),i=1,6)
- 192 stop
- 193 end
-
-
- Abbreviations Used
- NO not optimized
- DD data dependence
- SO scalar optimization
- DIR directive
- NCS non-concurrent-stmt
- C concurrentized
-
-
- Footnote List
-
- PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 5
-
- 1: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 2: scalar optimization Cleanup loop for loop unrolling.
- 3: data dependence Data dependence involving this line due to variable "RESID".
- 4: data dependence Data dependence involving this line due to variable "NORMX".
- 5: scalar optimization Statement deleted because of scalar optimization.
- 6: not optimized Unoptimizable call to "SECOND" found.
- 7: data dependence Data dependence involving this line due to variable "N".
- 8: data dependence Data dependence involving this line due to variable "LDA".
- 9: data dependence Data dependence involving this line due to variable "A".
- 10: data dependence Data dependence involving this line due to variable "NORMA".
- 11: data dependence Data dependence involving this line due to variable "B".
- 12: not optimized Unoptimizable call to "MATGEN" found.
- 13: data dependence Data dependence involving this line due to variable "TM2".
- 14: data dependence Data dependence involving this line due to variable "INFO".
- 15: data dependence Data dependence involving this line due to variable "IPVT".
- 16: not optimized Unoptimizable call to "DGEFA" found.
- 17: not optimized Unoptimizable call to "DGESL" found.
- 18: data dependence Data dependence involving this line due to variable "LDAA".
- 19: data dependence Data dependence involving this line due to variable "AA".
-
- PFA/SGI 10.0 k092805 910529 _MAIN Loop Summary 10-Aug-1993 10:08:55 Page 6
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 38 40 Do 10 I 1 3 4 scalar mode preferable
- 2 38 40 Do 10 I 1 3 4 3 concurrentized
- 3 42 44 Do 20 I 1 4 4 scalar mode preferable
- 4 42 44 Do 20 I 1 4 4 4 concurrentized
- 5 48 51 Do 30 I 1 12 4 unrolled
- 6 48 51 Do 30 I 1 12 4 unrolled
- 7 103 108 Do 90 I 1 204 1 unoptimizable call (DGEFA)
- 8 111 113 Do 100 I 1 50 1 unoptimizable call (DGESL)
- 9 168 173 Do 120 I 1 204 1 unoptimizable call (DGEFA)
- 10 176 178 Do 130 I 1 50 1 unoptimizable call (DGESL)
-
- PFA/SGI 10.0 k092805 910529 MATGEN Source 10-Aug-1993 10:08:55 Page 7
-
- Footnotes Actions DO Loops Line
-
- 194 subroutine matgen(a,lda,n,b,norma)
- 195 double precision a(lda,1),b(1),norma
- 196 c
- 197 init = 1325
- 198 norma = 0.0
- 1 OPT +--------- 199 do 30 j = 1,n
- 2 3 SO *+-------- 200 do 20 i = 1,n
- 4 DD *! 201 init = mod(3125*init,65536)
- 4 DD *! 202 a(i,j) = (init - 32768.0)/16384.0
- 5 DD *! 203 norma = dmax1(dabs(a(i,j)), norma)
- *!________ 204 20 continue
- *_________ 205 30 continue
- 1 OPT +--------- 206 do 35 i = 1,n
- ! 207 b(i) = 0.0
- !_________ 208 35 continue
- 2 3 LR SO +--------- 209 do 50 j = 1,n
- LR SO C !+-------- 210 do 40 i = 1,n
- 6 DD !* 211 b(i) = b(i) + a(i,j)
- !*________ 212 40 continue
- !_________ 213 50 continue
- 214 return
- 215 end
-
-
- Abbreviations Used
- OPT optimized
- LR loop reordering
- DD data dependence
- SO scalar optimization
- C concurrentized
-
-
- Footnote List
- 1: optimized Loop has been fused with others to reduce overhead.
- 2: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 3: scalar optimization Cleanup loop for loop unrolling.
- 4: data dependence Data dependence involving this line due to variable "INIT".
- 5: data dependence Data dependence involving this line due to variable "NORMA".
- 6: data dependence Data dependence involving this line due to variable "B".
-
- PFA/SGI 10.0 k092805 910529 MATGEN Loop Summary 10-Aug-1993 10:08:55 Page 8
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 199 208 Do 30 J 2 2 4 scalar mode preferable
- 2 200 204 Do 20 I 3 16 4 unrolled
- 3 200 204 Do 20 I 3 16 4 unrolled
- 4 199 208 Do 30 J 2 2 4 scalar mode preferable
- 5 199 208 Do 30 J 2 2 4 2 concurrentized
- 6 210 212 Do 40 I 1 6 concurrentized
- 7 209 213 Do 50 J 2 4 4 unrolled; already in a parallel loop
- 8 209 213 Do 50 J 2 4 4 unrolled; already in a parallel loop
- 9 206 208 Do 35 I 1 unrolled completely or removed
-
- PFA/SGI 10.0 k092805 910529 DGEFA Source 10-Aug-1993 10:08:55 Page 9
-
- Footnotes Actions DO Loops Line
-
- 216 subroutine dgefa(a,lda,n,ipvt,info)
- 217 integer lda,n,ipvt(1),info
- 218 double precision a(lda,1)
- 219 c
- 220 c dgefa factors a double precision matrix by gaussian elimination.
- 221 c
- 222 c dgefa is usually called by dgeco, but it can be called
- 223 c directly with a saving in time if rcond is not needed.
- 224 c (time for dgeco) = (1 + 9/n)*(time for dgefa) .
- 225 c
- 226 c on entry
- 227 c
- 228 c a double precision(lda, n)
- 229 c the matrix to be factored.
- 230 c
- 231 c lda integer
- 232 c the leading dimension of the array a .
- 233 c
- 234 c n integer
- 235 c the order of the matrix a .
- 236 c
- 237 c on return
- 238 c
- 239 c a an upper triangular matrix and the multipliers
- 240 c which were used to obtain it.
- 241 c the factorization can be written a = l*u where
- 242 c l is a product of permutation and unit lower
- 243 c triangular matrices and u is upper triangular.
- 244 c
- 245 c ipvt integer(n)
- 246 c an integer vector of pivot indices.
- 247 c
- 248 c info integer
- 249 c = 0 normal value.
- 250 c = k if u(k,k) .eq. 0.0 . this is not an error
- 251 c condition for this subroutine, but it does
- 252 c indicate that dgesl or dgedi will divide by zero
- 253 c if called. use rcond in dgeco for a reliable
- 254 c indication of singularity.
- 255 c
- 256 c linpack. this version dated 08/14/78 .
- 257 c cleve moler, university of new mexico, argonne national lab.
- 258 c
- 259 c subroutines and functions
- 260 c
- 261 c blas daxpy,dscal,idamax
- 262 c
- 263 c internal variables
- 264 c
- 265 double precision t
- 266 integer idamax,j,k,kp1,l,nm1
-
- PFA/SGI 10.0 k092805 910529 DGEFA Source 10-Aug-1993 10:08:55 Page 10
-
- 267 c
- 268 c
- 269 c gaussian elimination with partial pivoting
- 270 c
- 271 info = 0
- 1 SO 272 nm1 = n - 1
- SO 273 if (nm1 .lt. 1) go to 70
- 2 NO SO NCS +--------- 274 do 60 k = 1, nm1
- 1 SO ! 275 kp1 = k + 1
- ! 276 c
- ! 277 c find l = pivot index
- ! 278 c
- ! 279 l = idamax(n-k+1,a(k,k),1) + k - 1
- ! 280 ipvt(k) = l
- ! 281 c
- ! 282 c zero pivot implies this column already triangularized
- ! 283 c
- ! 284 if (a(l,k) .eq. 0.0d0) go to 40
- ! 285 c
- ! 286 c interchange if necessary
- ! 287 c
- SO ! 288 if (l .eq. k) go to 10
- ! 289 t = a(l,k)
- ! 290 a(l,k) = a(k,k)
- ! 291 a(k,k) = t
- ! 292 10 continue
- ! 293 c
- ! 294 c compute multipliers
- ! 295 c
- SO ! 296 t = -1.0d0/a(k,k)
- ! 297 call dscal(n-k,t,a(k+1,k),1)
- ! 298 c
- ! 299 c row elimination with column indexing
- ! 300 c
- SO NCS !+-------- 301 do 30 j = kp1, n
- !! 302 t = a(l,j)
- SO !! 303 if (l .eq. k) go to 20
- !! 304 a(l,j) = a(k,j)
- !! 305 a(k,j) = t
- !! 306 20 continue
- 3 NO NCS !! 307 call daxpy(n-k,t,a(k+1,k),1,a(k+1,j),1)
- 3 NO !!________ 308 30 continue
- ! 309 go to 50
- ! 310 40 continue
- ! 311 info = k
- ! 312 50 continue
- !_________ 313 60 continue
- 314 70 continue
- 315 ipvt(n) = n
- 316 if (a(n,n) .eq. 0.0d0) info = n
- 317 return
- 318 end
-
- PFA/SGI 10.0 k092805 910529 DGEFA Source 10-Aug-1993 10:08:55 Page 11
-
-
-
- Abbreviations Used
- NO not optimized
- SO scalar optimization
- NCS non-concurrent-stmt
-
-
- Footnote List
- 1: scalar optimization Statement deleted because of scalar optimization.
- 2: not optimized No optimizable statements found.
- 3: not optimized Unoptimizable call to "DAXPY" found.
-
- PFA/SGI 10.0 k092805 910529 DGEFA Loop Summary 10-Aug-1993 10:08:55 Page 12
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 274 313 Do 60 K 1 no optimizable statements
- 2 301 308 Do 30 J 2 53 1 unrolled
- 3 303 303 Do 30 J 2 53 1 unrolled
-
- PFA/SGI 10.0 k092805 910529 DGESL Source 10-Aug-1993 10:08:55 Page 13
-
- Footnotes Actions DO Loops Line
-
- 319 subroutine dgesl(a,lda,n,ipvt,b,job)
- 320 integer lda,n,ipvt(1),job
- 321 double precision a(lda,1),b(1)
- 322 c
- 323 c dgesl solves the double precision system
- 324 c a * x = b or trans(a) * x = b
- 325 c using the factors computed by dgeco or dgefa.
- 326 c
- 327 c on entry
- 328 c
- 329 c a double precision(lda, n)
- 330 c the output from dgeco or dgefa.
- 331 c
- 332 c lda integer
- 333 c the leading dimension of the array a .
- 334 c
- 335 c n integer
- 336 c the order of the matrix a .
- 337 c
- 338 c ipvt integer(n)
- 339 c the pivot vector from dgeco or dgefa.
- 340 c
- 341 c b double precision(n)
- 342 c the right hand side vector.
- 343 c
- 344 c job integer
- 345 c = 0 to solve a*x = b ,
- 346 c = nonzero to solve trans(a)*x = b where
- 347 c trans(a) is the transpose.
- 348 c
- 349 c on return
- 350 c
- 351 c b the solution vector x .
- 352 c
- 353 c error condition
- 354 c
- 355 c a division by zero will occur if the input factor contains a
- 356 c zero on the diagonal. technically this indicates singularity
- 357 c but it is often caused by improper arguments or improper
- 358 c setting of lda . it will not occur if the subroutines are
- 359 c called correctly and if dgeco has set rcond .gt. 0.0
- 360 c or dgefa has set info .eq. 0 .
- 361 c
- 362 c to compute inverse(a) * c where c is a matrix
- 363 c with p columns
- 364 c call dgeco(a,lda,n,ipvt,rcond,z)
- 365 c if (rcond is too small) go to ...
- 366 c do 10 j = 1, p
- 367 c call dgesl(a,lda,n,ipvt,c(1,j),0)
- 368 c 10 continue
- 369 c
-
- PFA/SGI 10.0 k092805 910529 DGESL Source 10-Aug-1993 10:08:55 Page 14
-
- 370 c linpack. this version dated 08/14/78 .
- 371 c cleve moler, university of new mexico, argonne national lab.
- 372 c
- 373 c subroutines and functions
- 374 c
- 375 c blas daxpy,ddot
- 376 c
- 377 c internal variables
- 378 c
- 379 double precision ddot,t
- 380 integer k,kb,l,nm1
- 381 c
- 1 SO 382 nm1 = n - 1
- SO 383 if (job .ne. 0) go to 50
- 384 c
- 385 c job = 0 , solve a * x = b
- 386 c first solve l*y = b
- 387 c
- SO 388 if (nm1 .lt. 1) go to 30
- SO NCS +--------- 389 do 20 k = 1, nm1
- 1 SO ! 390 l = ipvt(k)
- SO ! 391 t = b(l)
- SO ! 392 if (l .eq. k) go to 10
- SO ! 393 b(l) = b(k)
- ! 394 b(k) = t
- ! 395 10 continue
- 2 NO NCS ! 396 call daxpy(n-k,t,a(k+1,k),1,b(k+1),1)
- 2 NO !_________ 397 20 continue
- 398 30 continue
- 399 c
- 400 c now solve u*x = y
- 401 c
- NCS +--------- 402 do 40 kb = 1, n
- ! 403 k = n + 1 - kb
- ! 404 b(k) = b(k)/a(k,k)
- SO ! 405 t = -b(k)
- 2 NO NCS ! 406 call daxpy(k-1,t,a(1,k),1,b(1),1)
- 2 NO !_________ 407 40 continue
- 408 go to 100
- 409 50 continue
- 410 c
- 411 c job = nonzero, solve trans(a) * x = b
- 412 c first solve trans(u)*y = b
- 413 c
- NCS +--------- 414 do 60 k = 1, n
- 3 NO NCS ! 415 t = ddot(k-1,a(1,k),1,b(1),1)
- ! 416 b(k) = (b(k) - t)/a(k,k)
- 3 NO !_________ 417 60 continue
- 418 c
- 419 c now solve trans(l)*x = y
- 420 c
- SO 421 if (nm1 .lt. 1) go to 90
- SO NCS +--------- 422 do 80 kb = 1, nm1
-
- PFA/SGI 10.0 k092805 910529 DGESL Source 10-Aug-1993 10:08:55 Page 15
-
- 1 SO ! 423 k = n - kb
- 3 NO SO NCS ! 424 b(k) = b(k) + ddot(n-k,a(k+1,k),1,b(k+1),1)
- 1 SO ! 425 l = ipvt(k)
- SO ! 426 if (l .eq. k) go to 70
- SO ! 427 t = b(l)
- SO ! 428 b(l) = b(k)
- SO ! 429 b(k) = t
- ! 430 70 continue
- 3 NO !_________ 431 80 continue
- 432 90 continue
- 433 100 continue
- 434 return
- 435 end
-
-
- Abbreviations Used
- NO not optimized
- SO scalar optimization
- NCS non-concurrent-stmt
-
-
- Footnote List
- 1: scalar optimization Statement deleted because of scalar optimization.
- 2: not optimized Unoptimizable call to "DAXPY" found.
- 3: not optimized Unoptimizable call to "DDOT" found.
-
- PFA/SGI 10.0 k092805 910529 DGESL Loop Summary 10-Aug-1993 10:08:55 Page 16
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 389 397 Do 20 K 1 62 1 unoptimizable call (DAXPY)
- 2 402 407 Do 40 KB 1 62 1 unoptimizable call (DAXPY)
- 3 414 417 Do 60 K 1 58 1 unoptimizable call (DDOT)
- 4 422 431 Do 80 KB 1 74 1 unoptimizable call (DDOT)
-
- PFA/SGI 10.0 k092805 910529 DAXPY Source 10-Aug-1993 10:08:55 Page 17
-
- Footnotes Actions DO Loops Line
-
- 436 subroutine daxpy(n,da,dx,incx,dy,incy)
- 437 c
- 438 c constant times a vector plus a vector.
- 439 c jack dongarra, linpack, 3/11/78.
- 440 c
- 441 double precision dx(1),dy(1),da
- 442 integer i,incx,incy,ix,iy,m,mp1,n
- 443 c
- SO 444 if(n.le.0)return
- 445 if (da .eq. 0.0d0) return
- SO 446 if(incx.eq.1.and.incy.eq.1)go to 20
- 447 c
- 448 c code for unequal increments or equal increments
- 449 c not equal to 1
- 450 c
- 451 ix = 1
- 452 iy = 1
- SO 453 if(incx.lt.0)ix = (-n+1)*incx + 1
- SO 454 if(incy.lt.0)iy = (-n+1)*incy + 1
- 1 2 3 Q SO C +--------- 455 do 10 i = 1,n
- 4 DD * 456 dy(iy) = dy(iy) + da*dx(ix)
- * 457 ix = ix + incx
- * 458 iy = iy + incy
- *_________ 459 10 continue
- 460 return
- 461 c
- 462 c code for both increments equal to 1
- 463 c
- 464 20 continue
- 1 2 SO C +--------- 465 do 30 i = 1,n
- * 466 dy(i) = dy(i) + da*dx(i)
- *_________ 467 30 continue
- 468 return
- 469 end
-
-
- Abbreviations Used
- DD data dependence
- Q question
- SO scalar optimization
- C concurrentized
-
-
- Footnote List
- 1: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 2: scalar optimization Cleanup loop for loop unrolling.
- 3: question Is "INCY .EQ. 0" in the loop beginning at this statement?
- 4: data dependence Data dependence involving this line due to variable "DY".
-
- PFA/SGI 10.0 k092805 910529 DAXPY Loop Summary 10-Aug-1993 10:08:55 Page 18
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 455 459 Do 10 I 1 11 4 scalar mode preferable
- 2 455 459 Do 10 I 1 11 4 scalar mode preferable
- 3 455 459 Do 10 I 1 11 4 scalar mode preferable
- 4 455 459 Do 10 I 1 11 4 20 concurrentized
- 5 465 467 Do 30 I 1 6 4 scalar mode preferable
- 6 465 467 Do 30 I 1 6 4 6 concurrentized
-
- PFA/SGI 10.0 k092805 910529 DDOT Source 10-Aug-1993 10:08:55 Page 19
-
- Footnotes Actions DO Loops Line
-
- 470 double precision function ddot(n,dx,incx,dy,incy)
- 471 c
- 472 c forms the dot product of two vectors.
- 473 c jack dongarra, linpack, 3/11/78.
- 474 c
- 475 double precision dx(1),dy(1),dtemp
- 476 integer i,incx,incy,ix,iy,m,mp1,n
- 477 c
- 478 ddot = 0.0d0
- 479 dtemp = 0.0d0
- SO 480 if(n.le.0)return
- SO 481 if(incx.eq.1.and.incy.eq.1)go to 20
- 482 c
- 483 c code for unequal increments or equal increments
- 484 c not equal to 1
- 485 c
- 486 ix = 1
- 487 iy = 1
- SO 488 if(incx.lt.0)ix = (-n+1)*incx + 1
- SO 489 if(incy.lt.0)iy = (-n+1)*incy + 1
- 1 2 SO +--------- 490 do 10 i = 1,n
- 3 DD ! 491 dtemp = dtemp + dx(ix)*dy(iy)
- ! 492 ix = ix + incx
- ! 493 iy = iy + incy
- !_________ 494 10 continue
- 495 ddot = dtemp
- 496 return
- 497 c
- 498 c code for both increments equal to 1
- 499 c
- 500 20 continue
- 1 2 SO +--------- 501 do 30 i = 1,n
- 3 DD ! 502 dtemp = dtemp + dx(i)*dy(i)
- !_________ 503 30 continue
- 504 ddot = dtemp
- 505 return
- 506 end
-
-
- Abbreviations Used
- DD data dependence
- SO scalar optimization
-
-
- Footnote List
- 1: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 2: scalar optimization Cleanup loop for loop unrolling.
- 3: data dependence Data dependence involving this line due to variable "DTEMP".
-
- PFA/SGI 10.0 k092805 910529 DDOT Loop Summary 10-Aug-1993 10:08:55 Page 20
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 490 494 Do 10 I 1 9 4 unrolled
- 2 490 494 Do 10 I 1 9 4 unrolled
- 3 501 503 Do 30 I 1 5 4 unrolled
- 4 501 503 Do 30 I 1 5 4 unrolled
-
- PFA/SGI 10.0 k092805 910529 DSCAL Source 10-Aug-1993 10:08:55 Page 21
-
- Footnotes Actions DO Loops Line
-
- 507 subroutine dscal(n,da,dx,incx)
- 508 c
- 509 c scales a vector by a constant.
- 510 c jack dongarra, linpack, 3/11/78.
- 511 c
- 512 double precision da,dx(1)
- 513 integer i,incx,m,mp1,n,nincx
- 514 c
- SO 515 if(n.le.0)return
- SO 516 if(incx.eq.1)go to 20
- 517 c
- 518 c code for increment not equal to 1
- 519 c
- 1 SO 520 nincx = n*incx
- SO C +--------- 521 do 10 i = 1,nincx,incx
- * 522 dx(i) = da*dx(i)
- *_________ 523 10 continue
- 524 return
- 525 c
- 526 c code for increment equal to 1
- 527 c
- 528 20 continue
- 2 3 SO C +--------- 529 do 30 i = 1,n
- * 530 dx(i) = da*dx(i)
- *_________ 531 30 continue
- 532 return
- 533 end
-
-
- Abbreviations Used
- SO scalar optimization
- C concurrentized
-
-
- Footnote List
- 1: scalar optimization Statement deleted because of scalar optimization.
- 2: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 3: scalar optimization Cleanup loop for loop unrolling.
-
- PFA/SGI 10.0 k092805 910529 DSCAL Loop Summary 10-Aug-1993 10:08:55 Page 22
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 521 523 Do 10 I 1 4 4 4 concurrentized
- 2 529 531 Do 30 I 1 4 4 scalar mode preferable
- 3 529 531 Do 30 I 1 4 4 4 concurrentized
-
- PFA/SGI 10.0 k092805 910529 IDAMAX Source 10-Aug-1993 10:08:55 Page 23
-
- Footnotes Actions DO Loops Line
-
- 534 integer function idamax(n,dx,incx)
- 535 c
- 536 c finds the index of element having max. dabsolute value.
- 537 c jack dongarra, linpack, 3/11/78.
- 538 c
- 539 double precision dx(1),dmax
- 540 integer i,incx,ix,n
- 541 c
- 542 idamax = 0
- SO 543 if( n .lt. 1 ) return
- 544 idamax = 1
- SO 545 if(n.eq.1)return
- SO 546 if(incx.eq.1)go to 20
- 547 c
- 548 c code for increment not equal to 1
- 549 c
- 1 SO 550 ix = 1
- 551 dmax = dabs(dx(1))
- SO 552 ix = ix + incx
- 2 3 SO +--------- 553 do 10 i = 2,n
- 4 DD SO ! 554 if(dabs(dx(ix)).le.dmax) go to 5
- 5 DD ! 555 idamax = i
- 4 DD SO ! 556 dmax = dabs(dx(ix))
- SO ! 557 5 ix = ix + incx
- !_________ 558 10 continue
- 559 return
- 560 c
- 561 c code for increment equal to 1
- 562 c
- 563 20 dmax = dabs(dx(1))
- 2 3 SO +--------- 564 do 30 i = 2,n
- 4 DD ! 565 if(dabs(dx(i)).le.dmax) go to 30
- 5 DD ! 566 idamax = i
- 4 DD ! 567 dmax = dabs(dx(i))
- !_________ 568 30 continue
- 569 return
- 570 end
-
-
- Abbreviations Used
- DD data dependence
- SO scalar optimization
-
-
- Footnote List
- 1: scalar optimization Statement deleted because of scalar optimization.
- 2: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 3: scalar optimization Cleanup loop for loop unrolling.
- 4: data dependence Data dependence involving this line due to variable "DMAX".
- 5: data dependence Data dependence involving this line due to variable "IDAMAX".
-
- PFA/SGI 10.0 k092805 910529 IDAMAX Loop Summary 10-Aug-1993 10:08:55 Page 24
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 553 558 Do 10 I 1 12 4 unrolled
- 2 553 558 Do 10 I 1 12 4 unrolled
- 3 564 568 Do 30 I 1 10 4 unrolled
- 4 564 568 Do 30 I 1 10 4 unrolled
-
- PFA/SGI 10.0 k092805 910529 EPSLON Source 10-Aug-1993 10:08:55 Page 25
-
- Footnotes Actions DO Loops Line
-
- 571 double precision function epslon (x)
- 572 double precision x
- 573 c
- 574 c estimate unit roundoff in quantities of size x.
- 575 c
- 576 double precision a,b,c,eps
- 577 c
- 578 c this program should function properly on all systems
- 579 c satisfying the following two assumptions,
- 580 c 1. the base used in representing dfloating point
- 581 c numbers is not a power of three.
- 582 c 2. the quantity a in statement 10 is represented to
- 583 c the accuracy used in dfloating point variables
- 584 c that are stored in memory.
- 585 c the statement number 10 and the go to 10 are intended to
- 586 c force optimizing compilers to generate code satisfying
- 587 c assumption 2.
- 588 c under these assumptions, it should be true that,
- 589 c a is not exactly equal to four-thirds,
- 590 c b has a zero for its last bit or digit,
- 591 c c is not exactly equal to one,
- 592 c eps measures the separation of 1.0 from
- 593 c the next larger dfloating point number.
- 594 c the developers of eispack would appreciate being informed
- 595 c about any systems where these assumptions do not hold.
- 596 c
- 597 c *****************************************************************
- 598 c this routine is one of the auxiliary routines used by eispack iii
- 599 c to avoid machine dependencies.
- 600 c *****************************************************************
- 601 c
- 602 c this version dated 4/6/83.
- 603 c
- 604 a = 4.0d0/3.0d0
- 605 10 b = a - 1.0d0
- 606 c = b + b + b
- 607 eps = dabs(c-1.0d0)
- 608 if (eps .eq. 0.0d0) go to 10
- 609 epslon = eps*dabs(x)
- 610 return
- 611 end
-
- PFA/SGI 10.0 k092805 910529 EPSLON Loop Summary 10-Aug-1993 10:08:55 Page 26
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 605 608 Do 1 optimization disabled
-
- PFA/SGI 10.0 k092805 910529 DMXPY Source 10-Aug-1993 10:08:55 Page 27
-
- Footnotes Actions DO Loops Line
-
- 612 subroutine dmxpy (n1, y, n2, ldm, x, m)
- 613 double precision y(*), x(*), m(ldm,*)
- 614 c
- 615 c purpose:
- 616 c multiply matrix m times vector x and add the result to vector y.
- 617 c
- 618 c parameters:
- 619 c
- 620 c n1 integer, number of elements in vector y, and number of rows in
- 621 c matrix m
- 622 c
- 623 c y double precision(n1), vector of length n1 to which is added
- 624 c the product m*x
- 625 c
- 626 c n2 integer, number of elements in vector x, and number of columns
- 627 c in matrix m
- 628 c
- 629 c ldm integer, leading dimension of array m
- 630 c
- 631 c x double precision(n2), vector of length n2
- 632 c
- 633 c m double precision(ldm,n2), matrix of n1 rows and n2 columns
- 634 c
- 635 c ----------------------------------------------------------------------
- 636 c
- 637 c cleanup odd vector
- 638 c
- 1 SO 639 j = mod(n2,2)
- SO 640 if (j .ge. 1) then
- 2 3 SO C +--------- 641 do 10 i = 1, n1
- SO * 642 y(i) = (y(i)) + x(j)*m(i,j)
- *_________ 643 10 continue
- 644 endif
- 645 c
- 646 c cleanup odd group of two vectors
- 647 c
- 1 SO 648 j = mod(n2,4)
- SO 649 if (j .ge. 2) then
- 2 3 SO C +--------- 650 do 20 i = 1, n1
- SO * 651 y(i) = ( (y(i))
- * 652 $ + x(j-1)*m(i,j-1)) + x(j)*m(i,j)
- *_________ 653 20 continue
- 654 endif
- 655 c
- 656 c cleanup odd group of four vectors
- 657 c
- 1 SO 658 j = mod(n2,8)
- SO 659 if (j .ge. 4) then
- 2 3 SO C +--------- 660 do 30 i = 1, n1
- SO * 661 y(i) = ((( (y(i))
- * 662 $ + x(j-3)*m(i,j-3)) + x(j-2)*m(i,j-2))
-
- PFA/SGI 10.0 k092805 910529 DMXPY Source 10-Aug-1993 10:08:55 Page 28
-
- * 663 $ + x(j-1)*m(i,j-1)) + x(j) *m(i,j)
- *_________ 664 30 continue
- 665 endif
- 666 c
- 667 c cleanup odd group of eight vectors
- 668 c
- 1 SO 669 j = mod(n2,16)
- SO 670 if (j .ge. 8) then
- 3 4 SO C +--------- 671 do 40 i = 1, n1
- SO * 672 y(i) = ((((((( (y(i))
- * 673 $ + x(j-7)*m(i,j-7)) + x(j-6)*m(i,j-6))
- * 674 $ + x(j-5)*m(i,j-5)) + x(j-4)*m(i,j-4))
- * 675 $ + x(j-3)*m(i,j-3)) + x(j-2)*m(i,j-2))
- * 676 $ + x(j-1)*m(i,j-1)) + x(j) *m(i,j)
- *_________ 677 40 continue
- 678 endif
- 679 c
- 680 c main loop - groups of sixteen vectors
- 681 c
- SO 682 jmin = j+16
- LR +--------- 683 do 60 j = jmin, n2, 16
- LR C !+-------- 684 do 50 i = 1, n1
- 5 DD !* 685 y(i) = ((((((((((((((( (y(i))
- !* 686 $ + x(j-15)*m(i,j-15)) + x(j-14)*m(i,j-14))
- !* 687 $ + x(j-13)*m(i,j-13)) + x(j-12)*m(i,j-12))
- !* 688 $ + x(j-11)*m(i,j-11)) + x(j-10)*m(i,j-10))
- !* 689 $ + x(j- 9)*m(i,j- 9)) + x(j- 8)*m(i,j- 8))
- !* 690 $ + x(j- 7)*m(i,j- 7)) + x(j- 6)*m(i,j- 6))
- !* 691 $ + x(j- 5)*m(i,j- 5)) + x(j- 4)*m(i,j- 4))
- !* 692 $ + x(j- 3)*m(i,j- 3)) + x(j- 2)*m(i,j- 2))
- !* 693 $ + x(j- 1)*m(i,j- 1)) + x(j) *m(i,j)
- !*________ 694 50 continue
- !_________ 695 60 continue
- 696 return
- 697 end
-
-
- Abbreviations Used
- LR loop reordering
- DD data dependence
- SO scalar optimization
- C concurrentized
-
-
- Footnote List
- 1: scalar optimization Statement deleted because of scalar optimization.
- 2: scalar optimization Loop unrolled 4 times to improve scalar performance.
- 3: scalar optimization Cleanup loop for loop unrolling.
- 4: scalar optimization Loop unrolled 2 times to improve scalar performance.
- 5: data dependence Data dependence involving this line due to variable "Y".
-
- PFA/SGI 10.0 k092805 910529 DMXPY Loop Summary 10-Aug-1993 10:08:55 Page 29
-
-
- Loop Summary
-
- From To Loop Loop at Unroll Unroll Iteration
- Loop# line line label index nest weight factor workload Status
- 1 641 643 Do 10 I 1 7 4 scalar mode preferable
- 2 641 643 Do 10 I 1 7 4 12 concurrentized
- 3 650 653 Do 20 I 1 12 4 scalar mode preferable
- 4 650 653 Do 20 I 1 12 4 23 concurrentized
- 5 660 664 Do 30 I 1 22 4 scalar mode preferable
- 6 660 664 Do 30 I 1 22 4 45 concurrentized
- 7 671 677 Do 40 I 1 42 2 scalar mode preferable
- 8 671 677 Do 40 I 1 42 2 89 concurrentized
- 9 684 694 Do 50 I 1 concurrentized
- 10 683 695 Do 60 J 2 111 1 already in a parallel loop
-
- PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 30
-
- Footnotes Actions DO Loops Line
-
- 698
- 699
-